/********************************************************************
 Replication code: BD-LIFT and Women's Autonomy in Healthcare
 Paper: "From Connectivity to Care: How Digital Inclusion Shapes
         Women's Healthcare Decision-Making in Bangladesh"
 Conference: 12th International Conference on Next Generation
             Computing, Communication, Systems and Security (NSysS 2025)
 Venue: Sylhet, Bangladesh

 Author: Muhammad Hassan Bin Afzal
 Affiliation: Department of Applied Statistics and Data Science,
              Jahangirnagar University, Bangladesh
 Email: mafzal@kent.edu

 Data source:
   - Custom analytic dataset: bdlift_autonomy_custom.dta
   - Derived from BDHS 2022 Individual Recode (IR),
     obtained from The DHS Program (https://dhsprogram.com)
     under a standard data use agreement.
   - All original DHS identifiers and full microdata structure
     were removed. Only de-identified, model-ready variables
     are included here.

 Notes on weights & clusters:
   - The survey weight wt was computed from the original DHS
     weight (v005) and stored as wt in this custom dataset.
   - The multilevel cluster structure was built from the DHS
     cluster ID (v001). Here we only retain an anonymized
     cluster_id created via egen group(), which preserves the
     nesting but does not expose original DHS identifiers.
   - All survey-design work (linking wt and cluster structure
     to BDHS) was done before constructing this custom file,
     in full respect of DHS licensing and ethical rules.
********************************************************************/

clear all
set more off

* 1. Load the custom analytic dataset
//use "bdlift_autonomy_custom.dta", clear

* Quick check of structure
describe
//summarize autonomy_say bdlift age wealth education is_working residence wt

* (Optional) If you want to use svyset in extensions, you can define:
* svyset cluster_id [pweight=wt], vce(linearized)

*==============================================================*
* PART A: Multilevel Models (as in the NSysS 2025 paper)
*==============================================================*

* Define complete-case sample used for modeling
gen byte complete_case = !missing(autonomy_say, bdlift, residence, ///
                                  wealth, education, age, is_working, ///
                                  cluster_id, wt)
label var complete_case "Complete-case indicator for main model"

* Model 1: Bivariate (BD-LIFT only)
melogit autonomy_say i.bdlift [pweight=wt] if complete_case, ///
    || cluster_id:, or
estimates store Model1

* Model 2: BD-LIFT with BD-LIFT × residence interaction
melogit autonomy_say i.bdlift##i.residence [pweight=wt] if complete_case, ///
    || cluster_id:, or
estimates store Model2

* Model 3: Full model with controls
melogit autonomy_say i.bdlift##i.residence ///
                     i.wealth i.education i.age i.is_working ///
                     [pweight=wt] if complete_case, ///
    || cluster_id:, or
estimates store Model3

* Side-by-side table (requires esttab from estout package)
esttab Model1 Model2 Model3, eform ///
    cells(b(star fmt(3)) se(par fmt(3))) ///
    stats(N ll bic aic r2_p, fmt(0 3 3 3 3) ///
          labels("Observations" "Log likelihood" "BIC" "AIC" "Pseudo R-squared")) ///
    legend collabels(none) varlabels(_cons "Constant") ///
    starlevels(* 0.05 ** 0.01 *** 0.001)

*==============================================================*
* PART B: ICC and Random Effects
*==============================================================*

* Intraclass Correlation Coefficient (from Model 3)
estat icc

* Random effects (cluster-level)
predict re_cluster, reffects
label var re_cluster "Random intercept (cluster-level, log-odds)"

sort re_cluster
gen cluster_rank = _n
label var cluster_rank "Cluster rank by random effect"

twoway (scatter re_cluster cluster_rank, msymbol(o) msize(small)) ///
       , ///
       yline(0, lpattern(dash)) ///
       xtitle("Communities/Clusters (sorted)") ///
       ytitle("Random Effect (log-odds)") ///
       title("Caterpillar Plot: Cluster-Level Variation") ///
       legend(off)

* Histogram of random effects
histogram re_cluster, normal ///
    xtitle("Random Effect (log-odds)") ///
    ytitle("Number of Communities") ///
    title("Distribution of Cluster-Level Effects")

*==============================================================*
* PART C: ROC Curve and Calibration Plot
*==============================================================*

* Predict fitted probabilities from full model
predict phat_bdlift, mu
label var phat_bdlift "Predicted Pr(autonomy) from full model"

* ROC curve
roctab autonomy_say phat_bdlift, graph ///
    title("ROC Curve: Predicting Women's Autonomy (BD-LIFT Model)") ///
    ytitle("Sensitivity (True Positive Rate)") ///
    xtitle("1 - Specificity (False Positive Rate)") ///
    legend(off)

* Calibration plot
xtile decile_bdlift = phat_bdlift, n(10)
egen mean_phat_bdlift     = mean(phat_bdlift),     by(decile_bdlift)
egen mean_observed_bdlift = mean(autonomy_say),    by(decile_bdlift)

twoway (scatter mean_observed_bdlift mean_phat_bdlift, msymbol(circle)) ///
       (line mean_observed_bdlift mean_phat_bdlift, sort), ///
       title("Calibration Plot: Women's Autonomy (BD-LIFT Model)") ///
       xtitle("Predicted Probability") ///
       ytitle("Observed Proportion") ///
       legend(off)

*==============================================================*
* PART D: Predictive Margins and Marginsplots
*==============================================================*

* Re-run full model to ensure estimates in memory
melogit autonomy_say i.bdlift##i.residence ///
                     i.wealth i.education i.age i.is_working ///
                     [pweight=wt] if complete_case, ///
    || cluster_id:, or

* Margins for BD-LIFT levels
margins, at(bdlift=(0 1 2 3))
marginsplot, ///
    ytitle("Pr(Women Decision-making in Personal Health)") ///
    xtitle("BD-LIFT Index (0–3)") ///
    title("BD-LIFT Index and Women's Autonomy in Health") ///
    xlabel(0(1)3, angle(45) labsize(small))

* Margins over education
margins, at(education=(0 1 2 3 4 5))
marginsplot, ///
    ytitle("Pr(Women Decision-making in Personal Health)") ///
    xtitle("Education levels") ///
    title("Education and Women's Autonomy in Health") ///
    xlabel(0(1)5, angle(45) labsize(small))

* Margins: Education × BD-LIFT (by BD-LIFT)
margins education, at(bdlift=(0 1 2 3))
marginsplot, ///
    by(bdlift) ///
    ytitle("Pr(Women Decision-making in Personal Health)") ///
    xtitle("Education levels") ///
    title("Education × BD-LIFT and Women's Autonomy") ///
    xlabel(0(1)5, angle(45) labsize(small))

* Margins: Age × BD-LIFT
margins age, at(bdlift=(0 1 2 3))
marginsplot, ///
    by(bdlift) ///
    ytitle("Pr(Women Decision-making in Personal Health)") ///
    xtitle("Age group") ///
    title("Age × BD-LIFT and Women's Autonomy") ///
    xlabel(1(1)4, angle(45) labsize(small))

* Margins: Working status × BD-LIFT
margins is_working, at(bdlift=(0 1 2 3))
marginsplot, ///
    by(bdlift) ///
    ytitle("Pr(Women Decision-making in Personal Health)") ///
    xtitle("Employment status") ///
    title("Employment × BD-LIFT and Women's Autonomy") ///
    xlabel(0 "Not working" 1 "Working", angle(45) labsize(small))

********************************************************************
* End of replication script
********************************************************************/
